# Replication code for Taylor C. Boas and Amy Erica Smith, “Looks Like Me, Thinks Like Me: Descriptive Representation and Opinion Congruence in Brazil.” Latin American Research Review 54, 2 (2019).

# Analysis conducted in R 3.6.0 on MacOS 10.13.6

# NOTE: This file reads the Brazil AmericasBarometer and Brazilian Electoral Panel Study files, merges in external data, and writes new files to the working directory. We recommend running R replication files in the following order; please see readme.txt for details.
# 	1_merge_lapop.R
# 	2_merge_latinobarometro.R
# 	3_recode_reshape.R
# 	4_difference_in_distributions.R
# 	5_regressions.R
# 	6_civil_society_meeting.R
# 	7_mass_descriptives.R
# 	8_elite_descriptives.R
# 	9_mean_differences.R
#	10_difference_in_distributions_ks.R
#	11_elite_sample_simulation.R
#	12_converts_vs_lifelong.R

# Set working directory as appropriate
# setwd('~/Dropbox/brazil_leg_surveys/replication/')

# Clean desktop and load packages. Please make sure all necessary packages are installed.

rm(list=ls(all=T))

library(XML)
library(foreign)

load('data_rosettastone.RData')

# Load LAPOP AmericasBarometer files. Please visit https://www.vanderbilt.edu/lapop/ to download the AmericasBarometer Brazil survey data (Stata version) for 2006-2007, 2008, 2010, and 2012, and visit https://publications.iadb.org/en/publication/12807/brazilian-electoral-panel-studies-beps to download the 2010 Brazilian Electoral Panel Study data, and put the files in the working directory. We have confirmed that the replication code works with the versions of these files downloaded in early June 2019. If the filenames that you download differ from those below, the data files may have changed, and some revision of the code may be necessary to replicate the published results. 

lapop07<-read.dta('2138048899brazil_lapop_dims final 2007 v5.dta')
lapop08<-read.dta('30541815brazil_lapop_dims_2008_final_data_set_v10.dta')
lapop10<-read.dta('7948266051039660950Brazil_LAPOP_AmericasBarometer 2010 data set  approved v4.dta')
lapop12<-read.dta('54861031Brazil LAPOP AmericasBarometer 2012 Rev1_W.dta')
beps<-read.dta('BEPSwaves123_long.dta')

states<-readHTMLTable(htmlParse('ESTADOS E CAPITAIS DO BRASIL.html',encoding='utf8'),colClasses='character',stringsAsFactors=F)[[1]][,c('Estado','Sigla')]

lapop07$region<-lapop07$ESTRATOPRI[,drop=T]
lapop07$muni<-gsub(' $','', lapop07$BRAMUNICIPIO)
lapop07$muni<-gsub(' [A-Z]{3}$','', lapop07$muni)
lapop07$muni<-gsub(' [A-Z]{2}[0-9]{1}$','', lapop07$muni)
lapop07$uf<-gsub('.* ([A-Z]{2})$','\\1', lapop07$muni)
lapop07$uf[nchar(lapop07$uf)>2]<-NA
lapop07$muni<-gsub(' [A-Z]{2}$','', lapop07$muni)
lapop07$muni[lapop07$muni=='FLORIAN\U3e33643cPOLIS']<-'FLORIANOPOLIS'
# Checked those with NA for uf and did research to correct errors. NOTE: Provincia is screwed up and not consistently the correct state
unique(lapop07[is.na(lapop07$uf),c('muni','Provincia','region')])

lapop07$uf[lapop07$muni=='RIO PRETO']<-'AM'
lapop07$muni[lapop07$muni=='RIO PRETO']<-'RIO PRETO DA EVA'
lapop07$uf[lapop07$muni=='MARABA']<-'PA'
lapop07$uf[lapop07$muni=='VIGIA']<-'PA'
lapop07$uf[lapop07$muni=='PRIMAVERA']<-'PA'
lapop07$uf[lapop07$muni=='SATUBINHA']<-'MA'
lapop07$uf[lapop07$muni=='IGUARACI']<-'PE'
lapop07$uf[lapop07$muni=='ITABERABA']<-'BA'

sum(!paste(lapop07$muni,lapop07$uf) %in% paste(chartr("ÁÂÃÀÄÈÉÊËÌÍÎÏÒÓÔÕÖÙÚÛÜÇ","AAAAAEEEEIIIIOOOOOUUUUC",toupper(br$mun_tse2006)),br$state))
munimatches07<-sapply(paste(lapop07$muni,lapop07$uf),function(x) which(paste(chartr("ÁÂÃÀÄÈÉÊËÌÍÎÏÒÓÔÕÖÙÚÛÜÇ","AAAAAEEEEIIIIOOOOOUUUUC",toupper(br$mun_tse2006)),br$state)==x))
table(sapply(munimatches07,length))
lapop07$tse_code<-br$codetse[match(paste(lapop07$muni,lapop07$uf), paste(chartr("ÁÂÃÀÄÈÉÊËÌÍÎÏÒÓÔÕÖÙÚÛÜÇ","AAAAAEEEEIIIIOOOOOUUUUC",toupper(br$mun_tse2006)),br$state))]

lapop08$region<-lapop08$estratopri
lapop08$uf<-as.character(lapop08$prov)
lapop08$muni<-gsub(' $','', lapop08$municipio)
lapop08$muni<-gsub('^ *','', lapop08$muni)
lapop08$muni[lapop08$muni=='FLORIAN\U3e33643cPOLIS']<-'FLORIANOPOLIS'
lapop08$muni[lapop08$muni=='RIO PRETO EVA']<-'RIO PRETO DA EVA'
lapop08$uf[lapop08$muni=='FATIMA DO SUL']<-'MS' # Miscoding in data file

sum(!paste(lapop08$muni,lapop08$uf) %in% paste(chartr("ÁÂÃÀÄÈÉÊËÌÍÎÏÒÓÔÕÖÙÚÛÜÇ","AAAAAEEEEIIIIOOOOOUUUUC",toupper(br$mun_tse2006)),br$state))
munimatches08<-sapply(paste(lapop08$muni,lapop08$uf),function(x) which(paste(chartr("ÁÂÃÀÄÈÉÊËÌÍÎÏÒÓÔÕÖÙÚÛÜÇ","AAAAAEEEEIIIIOOOOOUUUUC",toupper(br$mun_tse2006)),br$state)==x))
table(sapply(munimatches08,length))
lapop08$tse_code<-br$codetse[match(paste(lapop08$muni,lapop08$uf), paste(chartr("ÁÂÃÀÄÈÉÊËÌÍÎÏÒÓÔÕÖÙÚÛÜÇ","AAAAAEEEEIIIIOOOOOUUUUC",toupper(br$mun_tse2006)),br$state))]

lapop10$region<-lapop10$estratopri
lapop10$uf<-lapop10$braprov
lapop10$muni<-iconv(lapop10$bramunicipio,from='latin1')
lapop10$muni<-chartr("ÁÂÃÀÄÈÉÊËÌÍÎÏÒÓÔÕÖÙÚÛÜÇ","AAAAAEEEEIIIIOOOOOUUUUC",toupper(lapop10$muni))
lapop10$muni[lapop10$muni=='EMBU-GUACU']<-'EMBU GUACU'
lapop10$muni[lapop10$muni=='SENADOR GUIOMNARD']<-'SENADOR GUIOMARD'
lapop10$muni[lapop10$muni=='PORTO ESPIRIDIAO']<-'PORTO ESPERIDIAO'

sum(!paste(lapop10$muni,lapop10$uf) %in% paste(chartr("ÁÂÃÀÄÈÉÊËÌÍÎÏÒÓÔÕÖÙÚÛÜÇ","AAAAAEEEEIIIIOOOOOUUUUC",toupper(br$mun_tse2006)),br$state))
munimatches10<-sapply(paste(lapop10$muni,lapop10$uf),function(x) which(paste(chartr("ÁÂÃÀÄÈÉÊËÌÍÎÏÒÓÔÕÖÙÚÛÜÇ","AAAAAEEEEIIIIOOOOOUUUUC",toupper(br$mun_tse2006)),br$state)==x))
table(sapply(munimatches10,length))
lapop10$tse_code<-br$codetse[match(paste(lapop10$muni,lapop10$uf), paste(chartr("ÁÂÃÀÄÈÉÊËÌÍÎÏÒÓÔÕÖÙÚÛÜÇ","AAAAAEEEEIIIIOOOOOUUUUC",toupper(br$mun_tse2006)),br$state))]

beps$region<-beps$estratopri
beps$uf<-beps$prov
beps$muni<-iconv(beps$municipio,from='latin1')
beps$muni<-chartr("ÁÂÃÀÄÈÉÊËÌÍÎÏÒÓÔÕÖÙÚÛÜÇ","AAAAAEEEEIIIIOOOOOUUUUC",toupper(beps$muni))
beps$muni[beps$muni=='EMBU-GUACU']<-'EMBU GUACU'
beps$muni[beps$muni=='MOSQUEIRO']<-'BELEM'
beps$muni[beps$muni=='PORTO ESPIRIDIAO']<-'PORTO ESPERIDIAO'
beps$muni[beps$uf=='DF']<-'BRASILIA'

sum(!paste(beps$muni,beps$uf) %in% paste(chartr("ÁÂÃÀÄÈÉÊËÌÍÎÏÒÓÔÕÖÙÚÛÜÇ","AAAAAEEEEIIIIOOOOOUUUUC",toupper(br$mun_tse2006)),br$state))
munimatches.beps<-sapply(paste(beps$muni,beps$uf),function(x) which(paste(chartr("ÁÂÃÀÄÈÉÊËÌÍÎÏÒÓÔÕÖÙÚÛÜÇ","AAAAAEEEEIIIIOOOOOUUUUC",toupper(br$mun_tse2006)),br$state)==x))
table(sapply(munimatches.beps,length))
beps$tse_code<-br$codetse[match(paste(beps$muni,beps$uf), paste(chartr("ÁÂÃÀÄÈÉÊËÌÍÎÏÒÓÔÕÖÙÚÛÜÇ","AAAAAEEEEIIIIOOOOOUUUUC",toupper(br$mun_tse2006)),br$state))]

lapop12$region<-lapop12$estratopri[,drop=T]
lapop12$prov<-iconv(lapop12$prov,from='latin1')
lapop12$prov[lapop12$prov=='Paraiba']<-'Paraíba'
lapop12$uf<-states$Sigla[match(lapop12$prov,states$Estado)]

# Municipio codes are IBGE codes, with '15' appended to the front of each, except for 3550308 (São Paulo), 355038 (apparently São Paulo with data entry error), and 3515004 (Embu das Artes, SP). 15355030 is apparently São Paulo as well. Verified in these cases that state is SP and tamano is Capital.

lapop12$municipio<-as.numeric(gsub('^15','', lapop12$municipio))
lapop12$municipio[lapop12$municipio %in% c(355038, 355030)]<-3550308
lapop12$tse_code<-br$codetse[match(lapop12$municipio,br$codeibge)]
lapop12$muni<-br$mun_datafolha[match(lapop12$municipio,br$codeibge)]

# Save files with merged in TSE codes
save(lapop12,file='lapop_brazil12.RData')
save(lapop10,file='lapop_brazil10.RData')
save(lapop08,file='lapop_brazil08.RData')
save(lapop07,file='lapop_brazil07.RData')
save(beps,file='beps_brazil10.RData')
